## R Script Outputs ------------------------------------------------------------
# Figure 4: Persistence in Immigration Lobbying
# Appendix Table D.3: Firms that Lobbied on Immigration in 2016 but not in 2017
# Appendix Table D.4: Firms that Lobbied on Immigration in 2017 but not in 2016
# Appendix Table D.5: Firms that Lobbied Bureaucracies on Immigration in 2017


## Instructions ----------------------------------------------------------------
# Step 1: Adjust MAIN_DIR to where README.txt is located
# Step 2: Run entire script


## IMPORTANT NOTE --------------------------------------------------------------
# To protect Orbis' proprietary data, Appendix Tables D.3, D.4, and D5 exclude 
# information on firms' NAICS industry, public status, and size. Firm names are
# ordered alphabetically.


## setup -----------------------------------------------------------------------
# clean slate
rm(list = ls())
date()

# load packages
pkg <- c("concordance",
         "tidyverse",
         "RColorBrewer", 
         "gridExtra", 
         "viridis",
         "xtable")

lapply(pkg, require, character.only = TRUE)

# set main directory
MAIN_DIR <- "~/Dropbox/Research/JOP-h1b-replication"


## load data -------------------------------------------------------------------
load(file = paste(MAIN_DIR, "data-merge-91-17-orbis-excluded.RData", sep = "/"))


## Figure 4: Persistence in Immigration Lobbying -------------------------------
# subset and create variable of interest
data.persist.sub <- data.merge %>%
  filter(year >= 1999) %>%
  select(bvd_id, 
         bvd_name, 
         year,
         n_img_rep_year, 
         est_img_expense_fy) %>%
  mutate(lob_img = ifelse(n_img_rep_year > 0, 1, 0)) 

# create placeholder for each firm-year
# helps fill in absent rows between years when firms first occurred and last existed
# NOTE: takes a while to run (around 20 minutes) on a iMac with 3.6 GHz 10-Core Intel Core i9 and 64 GB DDR4 RAM
# default below loads saved output to save computational time
first.time <- FALSE

if (first.time == TRUE) {
  ptm <- proc.time()
  data.persist.ph <- data.persist.sub %>%
    select(bvd_id, year) %>%
    group_by(bvd_id) %>% 
    complete(year = full_seq(year, 1)) %>%
    ungroup()
  proc.time() - ptm  
  
  # save placeholder df for future use to save time
  save(data.persist.ph, 
       file = paste(MAIN_DIR, "data-persist-ph.RData", sep = "/"))
  
} else {
  
  load(file = paste(MAIN_DIR, "data-persist-ph.RData", sep = "/"))
  
}

# merge 
data.persist <- left_join(data.persist.ph, 
                          data.persist.sub, 
                          by = c("bvd_id", "year"))

# create variables
data.persist <- data.persist %>%
  mutate(lob_img = ifelse(is.na(lob_img), 0, lob_img)) %>%
  arrange(bvd_id, year) %>%
  group_by(bvd_id) %>%
  mutate(lob_img_lag_1 = dplyr::lag(lob_img, 1)) %>%
  ungroup() %>%
  mutate(lob_img_lag_1 = ifelse(is.na(lob_img_lag_1), 0, lob_img_lag_1),
         persist_lob = ifelse(lob_img_lag_1 == 1 & lob_img == 1, 1, 0),
         no_lob_lag_1 = ifelse(lob_img_lag_1 == 0, 1, 0),
         no_lob = ifelse(lob_img == 0, 1, 0),
         persist_no_lob = ifelse(no_lob_lag_1 == 1 & no_lob == 1, 1, 0)) 

# summarize
data.persist.sum <- data.persist %>%
  group_by(year) %>% 
  summarize(tot_no_lob_lag_1 = sum(no_lob_lag_1, na.rm = TRUE),
            tot_lob_lag_1 = sum(lob_img_lag_1, na.rm = TRUE),
            tot_persist_lob = sum(persist_lob, na.rm = TRUE),
            tot_persist_no_lob = sum(persist_no_lob, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(ratio_persist_no_lob = tot_persist_no_lob / tot_no_lob_lag_1,
         ratio_persist_lob = tot_persist_lob / tot_lob_lag_1) %>%
  pivot_longer(cols = c(ratio_persist_lob, ratio_persist_no_lob),
               names_to = "persistence",
               values_to = "ratio") %>%
  filter(year >= 2000)

# set parameters
axis.title.size <- 12
text.size <- 4

# plot
p.persistence <- ggplot(data.persist.sum, 
                        aes(x = year, y = ratio, 
                            group = persistence, 
                            color = persistence,
                            shape = persistence)) +
  geom_point(size = 3) +
  geom_line() +
  scale_color_manual(values = c("red", "black")) +
  scale_shape_manual(values = c(19, 15)) +
  scale_y_continuous("Proportion of Firms\nPersistent in Immigration Lobbying") +
  scale_x_continuous("Year",
                     breaks = seq(2000, 2017, by = 1),
                     labels = seq(2000, 2017, by = 1),
                     limits = c(2000, 2017)) +
  theme_bw() + 
  theme(plot.title = element_text(size = axis.title.size,
                                  face = "bold",
                                  margin = margin(0, 0, 20, 0),
                                  hjust = 0.5),
        axis.title.y = element_text(size = axis.title.size + 4,
                                    margin = margin(0, 10, 0, 0)),
        axis.title.x = element_text(size = axis.title.size + 1,
                                    margin = margin(10, 0, 0, 0)),
        axis.text.y = element_text(size = axis.title.size - 2),
        axis.text.x = element_text(size = axis.title.size - 4),
        panel.grid.minor = element_blank(),
        panel.grid.major = element_blank(),
        legend.position = "none") + 
  annotate("text", 
           x = 2010, 
           y = 0.58,
           label = "Lobbying in both t-1 and t", 
           color = "red",
           hjust = 0,
           size = text.size) + 
  annotate("text", 
           x = 2010, 
           y = 0.96,
           label = "Not Lobbying in both t-1 and t", 
           color = "black",
           hjust = 0,
           size = text.size)

# save
pdf(paste(MAIN_DIR, "Figure-4.pdf", sep = "/"), 
    width = 6.3, height = 4)
p.persistence
dev.off()


## Appendix Table D.3: Firms that Lobbied on Immigration in 2016 but not in 2017 ----
# extract firms
firm.lob.16.not.17.vec <- data.persist %>% 
  filter(year == 2017 & lob_img_lag_1 == 1 & lob_img == 0) %>%
  pull(bvd_id)

# subset
# NOTE: Information on firms' NAICS industry, public status, and size are excluded 
# to protect Orbis' proprietary data. Firm names are ordered alphabetically.
firm.lob.16.not.17.df <- data.merge %>%
  filter(bvd_id %in% firm.lob.16.not.17.vec) %>%
  filter(year == 2017) %>%
  select(bvd_name,
         #naics_core, 
         #naics_core_descrip,
         #public,
         #size
         ) %>%
  # mutate(naics_2d = str_sub(naics_core, 1, 2),
  #        naics_2d = ifelse(naics_2d == "31" | naics_2d == "32" | naics_2d == "33", "31-33", naics_2d),
  #        naics_2d_desc = get_desc(naics_2d, "NAICS2017"),
  #        public = ifelse(public == 1, "Yes", "No")) %>%
  # select(bvd_name, naics_2d, naics_2d_desc,
  #        public, size) %>%
  rename(`Firm Name` = bvd_name#,
         #`NAICS Code` = naics_2d,
         #`NAICS Description` = naics_2d_desc,
         #Public = public,
         #Size = size
         ) %>%
  # arrange(`NAICS Code`, Size, Public, Firm)
  arrange(`Firm Name`)
  
# summarize and save
#sink(file.path(MAIN_DIR, "Appendix-Table-D3.tex"))
sink(file.path(MAIN_DIR, "Appendix-Table-D3.html"))
print(xtable(firm.lob.16.not.17.df, 
             caption = "Firms that Lobbied on Immigration in 2016 but not in 2017",
             label = "tb:nonersistent-firms-lob-16",
             #align = "llclcc"
             align = "ll"
             ),
      size = "scriptsize",
      include.rownames = FALSE,
      #type = "latex"#,
      type = "html"
      )
sink()


## Appendix Table D.4: Firms that Lobbied on Immigration in 2017 but not in 2016 ----
# extract firms
firm.not.lob.16.lob.17.vec <- data.persist %>% 
  filter(year == 2017 & lob_img_lag_1 == 0 & lob_img == 1) %>%
  pull(bvd_id)

# subset
# NOTE: Information on firms' NAICS industry, public status, and size are excluded 
# to protect Orbis' proprietary data. Firm names are ordered alphabetically.
firm.not.lob.16.lob.17.df <- data.merge %>%
  filter(bvd_id %in% firm.not.lob.16.lob.17.vec) %>%
  filter(year == 2017) %>%
  select(bvd_name,
         #naics_core, 
         #naics_core_descrip,
         #public,
         #size
         ) %>%
  # mutate(naics_2d = str_sub(naics_core, 1, 2),
  #        naics_2d = ifelse(naics_2d == "31" | naics_2d == "32" | naics_2d == "33", "31-33", naics_2d),  
  #        naics_2d = ifelse(naics_2d == "44" | naics_2d == "45", "44-45", naics_2d),  
  #        naics_2d_desc = get_desc(naics_2d, "NAICS2017"),
  #        public = ifelse(public == 1, "Yes", "No")) %>%
  # select(bvd_name, naics_2d, naics_2d_desc, 
  #        public, size) %>%
  rename(`Firm Name` = bvd_name#,
         #`NAICS Code` = naics_2d,
         #`NAICS Description` = naics_2d_desc,
         #Public = public,
         #Size = size
         ) %>%
  mutate(`Firm Name` = ifelse(`Firm Name` == "UNIVERSAL CORP (Previous name: UNIVERSAL LEAF TOBACCO CO INC)", "UNIVERSAL CORP", `Firm Name`),
         `Firm Name` = ifelse(`Firm Name` == "ARAMARK SPORTS & ENTERTAINMENT SERVICES LLC", "ARAMARK SPORTS & ENT. SERVICES LLC", `Firm Name`)) %>%
  # arrange(`NAICS Code`, Size, Public, Firm)
  arrange(`Firm Name`)

# summarize and save
#sink(file.path(MAIN_DIR, "Appendix-Table-D4.tex"))
sink(file.path(MAIN_DIR, "Appendix-Table-D4.html"))
print(xtable(firm.not.lob.16.lob.17.df, 
             caption = "Firms that Lobbied on Immigration in 2017 but not in 2016",
             label = "tb:nonersistent-firms-lob-17",
             #align = "llclcc"
             align = "ll"
             ),
      size = "scriptsize",
      include.rownames = FALSE,
      #type = "latex"#,
      type = "html"
      )
sink()


## Table D.5: Firms that Lobbied Bureaucracies on Immigration in 2017 ----------
# subset
# NOTE: Information on firms' NAICS industry, public status, and size are excluded 
# to protect Orbis' proprietary data. Firm names are ordered alphabetically.
bureau.lob.df <- data.merge %>% 
  filter(year == 2017 & (lob_uscis_2017 == 1 | lob_dhs_2017 == 1 | lob_dol_2017 == 1 | lob_wh_eop_2017 == 1)) %>%
  select(bvd_name,
         lob_uscis_2017, lob_dol_2017, lob_dhs_2017, lob_wh_eop_2017#, 
         #naics_core, naics_core_descrip,
         #size,
         #public
         )

# prepare summary data
bureau.lob.df.l <- bureau.lob.df %>%
  mutate(bvd_name = str_remove(bvd_name, "\\.$")) %>%
  pivot_longer(
    cols = starts_with("lob"),
    values_to = "target") %>%
  filter(target == 1) %>%
  mutate(name = ifelse(name == "lob_uscis_2017", "USCIS", name),
         name = ifelse(name == "lob_dhs_2017", "DHS", name),
         name = ifelse(name == "lob_dol_2017", "DOL", name),
         name = ifelse(name == "lob_wh_eop_2017", "WH/EOP", name)) %>%
  group_by(bvd_name) %>%
  summarize(`Targeted Bureaucracy` = paste0(name, collapse = ", ")#,
            #naics_core = first(naics_core),
            #naics_core_descrip = first(naics_core_descrip),
            #size = first(size),
            #public = first(public)
            ) %>%
  # mutate(naics_2d = str_sub(naics_core, 1, 2),
  #        naics_2d = ifelse(naics_2d == "31" | naics_2d == "32" | naics_2d == "33", "31-33", naics_2d),  
  #        naics_2d = ifelse(naics_2d == "44" | naics_2d == "45", "44-45", naics_2d), 
  #        naics_2d = ifelse(naics_2d == "48" | naics_2d == "49", "48-49", naics_2d), 
  #        naics_2d_desc = get_desc(naics_2d, "NAICS2017"),
  #        public = ifelse(public == 1, "Yes", "No")) %>%
  select(bvd_name, 
         `Targeted Bureaucracy`#, 
         #naics_2d, 
         #naics_2d_desc, 
         #public, 
         #size
         ) %>%
  rename(`Firm Name` = bvd_name#,
         #`NAICS Code` = naics_2d,
         #`NAICS Description` = naics_2d_desc,
         #Public = public,
         #Size = size
         )  %>%
  mutate(`Firm Name` = ifelse(`Firm Name` == "COGNIZANT TECHNOLOGY SOLUTIONS CORP", "COGNIZANT TECH. SOLUTIONS CORP", `Firm Name`),
         `Firm Name` = ifelse(`Firm Name` == "ARAMARK SPORTS & ENTERTAINMENT SERVICES LLC", "ARAMARK SPORTS & ENT. SERVICES LLC", `Firm Name`)#,
         #`NAICS Description` = ifelse(`NAICS Description` == "Administrative and Support and Waste Management and Remediation Services", "Admin. and Support", `NAICS Description`),
         #`NAICS Description` = ifelse(`NAICS Description` == "Professional, Scientific, and Technical Services", "Professional/Scientific/Technical Serv.", `NAICS Description`)
         ) %>%
  #arrange(`NAICS Code`, Size, Public, Firm)
  arrange(`Firm Name`)

# save
#sink(file.path(MAIN_DIR, "Appendix-Table-D5.tex"))
sink(file.path(MAIN_DIR, "Appendix-Table-D5.html"))
print(xtable(bureau.lob.df.l, 
             caption = "Firms that Lobbied Bureaucracies on Immigration in 2017",
             label = "tb:firms-lob-bureau-17",
             #align = "lllclcl"
             align = "lll"
             ),
      size = "scriptsize",
      include.rownames = FALSE,
      #type = "latex"#,
      type = "html"
      )
sink()

